* This program creates the key dataset for follow-up child data that gets used for subsequent regressions.

local folder "C:\Users\mlevere\OneDrive - Mathematica\Documents\Projects\Nepal\"

clear all
set more off

use "`folder'/Data/follow_up_raw_child_z_rc", clear

/* This dataset already has the parent's information. 
   But now add on the household info too */
merge m:1 www hh sn using "`folder'/Data/follow_up_raw_hh", nogen keep(3)

/* Rename all endline variables, so can merge in the baseline ones too */
local norenamevars = "district q00_001a vdc vdc_code co_code treatment cash_control info_control vdc_name www hh sn control_vdc info_vdc cash_vdc"

qui ds `norenamevars', not
local varnames "`r(varlist)'"
foreach var of varlist `varnames' {
rename `var' `var'_e
}

rename q00_001a idc_child

tempfile child_data_panel
save "`child_data_panel'", replace

// Keep all children from households that were interviewed at baseline (i.e. no new kids in new households)
use "`folder'/Data/baseline_raw_hh.dta", clear
keep www hh district vdc vdc_code
merge 1:m www hh district vdc vdc_code using "`child_data_panel'", nogen keep(3)

save "`child_data_panel'", replace

// Add on baseline information where available
use "`folder'/Data/baseline_raw_child_z_rc.dta", clear

merge m:1 www hh using "`folder'/Data/baseline_raw_hh.dta", nogen keep(3)

rename q06_idc idc_child

local norenamevars = "www hh idc_child district vdc vdc_code treatment cash_control info_control vdc_name control_vdc info_vdc cash_vdc"

local kvars "_zwei _zlen _zwfl child_tot_months rooms stories hh_land farmland"

keep `norenamevars' `kvars'

foreach var of local kvars {
rename `var' `var'_b
}

// Merge information about the baseline characteristics for youth (where available) onto the main data
// Recall that we are already only keeping households who were present at baseline 
merge 1:m `norenamevars' using "`child_data_panel'", keep(2 3)
gen has_baseline_data = _merge == 3
gen no_baseline_data = _merge == 2
drop _merge

/* Keep only those in wave 2 vdc's */

merge m:1 district vdc_name using "`folder'/Data/wave1_vdcs.dta", keep(1 3)
gen endline_wave = _merge == 1
drop _merge


/* Generate variables that will be used in analysis */
svyset vdc_code, strata(district)

/* Because we are pooling both new children and children for whom we have data at
   baseline, create mutually exclusive indicators that can serve as controls:
   1) Underweight
   2) Not-underweight
   3) New child */
gen underweight_b = _zwei_b < -2 & _zwei_b ~= .
gen underweight_no_b = _zwei_b >= -2 & _zwei_b ~= .
gen stunted_b = _zlen_b < -2 & _zwei_b ~= .
gen stunted_no_b = _zlen_b >= -2 & _zwei_b ~= .
gen wasted_b = _zwfl_b < -2 & _zwei_b ~= .
gen wasted_no_b = _zwfl_b >= -2 & _zwei_b ~= .
gen no_baseline = _zwei_b == .

gen underweight_e = _zwei_e < -2 if _zwei_e ~= .
gen stunted_e = _zlen_e < -2 if _zwei_e ~= .
gen wasted_e = _zwfl_e < -2 if _zwei_e ~= .

gen occulation = child_occulated_e == 1 | child_occulated_e == 2

/* Child gender/age buckets */
gen male = child_sex_e == 1

gen child_tot_buckets = child_tot_months_e
replace child_tot_buckets = child_tot_months_e + 1 if mod(child_tot_months_e,2) == 1 & child_tot_months_e <= 24
replace child_tot_buckets = child_tot_months_e + (3-mod(child_tot_months_e,3)) if child_tot_months_e >= 25 & child_tot_months_e <= 36 & mod(child_tot_months_e,3) ~= 0
replace child_tot_buckets = child_tot_months_e + (6-mod(child_tot_months_e,6)) if child_tot_months_e >= 37 & mod(child_tot_months_e,6) ~= 0
tab child_tot_buckets, gen(child_tot_months)

/* Mother ever had schooling/years of schooling */
gen schooling_e = highest_grade_e
replace schooling_e = 0 if school_e == 2

replace school_e = 0 if school_e == 2

gen diarrhea = child_illness_type_e == 1

gen exclusive_6mo_e = knowledge_exclusivebreastfeed_e == 2

gen preg_more_e = knowledge_pregnant_food_e == 1

gen diarrhea_more_e = knowledge_breastfeed_diahrrea_e == 3

gen givebaby_firstmilk_e = knowledge_firstmilk_e == 1

gen feed_6t11_e = knowledge_6t11_fed_e >= 2 & knowledge_6t11_fed_e <= 4

gen snacks_12t24_e = knowledge_12t24_snacks_e >= 4 & knowledge_12t24_snacks_e <= 6

gen iron_3t4_e = knowledge_iron_supp_e == 3 | knowledge_iron_supp_e == 4

gen deworming_2ndtri_e = knowledge_deworming_e >= 4 & knowledge_deworming_e <= 7

gen vitaminA_45_e = knowledge_vitaminA_e <= 45

gen anc_checkups_4_e = knowledge_anc_checkups_e == 4

gen knowledge_index = givebaby_firstmilk_e + exclusive_6mo_e + feed_6t11_e + snacks_12t24_e + diarrhea_more_e + iron_3t4_e + deworming_2ndtri_e + vitaminA_45_e + preg_more_e + anc_checkups_4_e


local vars "youngest_threebooks youngest_toys belief_teaching_kids youngest_meals_wfam youngest_convo"
foreach var of local vars {
replace `var' = 0 if `var' == 2
}

/* Drop households that were dropped at baseline */
merge m:1 www hh using "`folder'/Data/baseline_flag_hh.dta", nogen keep(3)
drop if flag_spending == 1 | flag_missingwomen == 1
drop flag*


label variable control "Control"
label variable info_vdc "Info Only"
label variable cash_vdc "Info + Cash"
label variable asq_communication_e "Communication"
label variable asq_grossmotor_e "Gross Motor"
label variable asq_finemotor_e "Fine Motor"
label variable asq_personalsocial_e "Personal Social"
label variable asq_problemsolving_e "Problem Solving"
label variable underweight_e "Underweight"
label variable stunted_e "Stunted"
label variable wasted_e "Wasted"
label variable _zwei_e "Weight-for-Age"
label variable _zlen_e "Height-for-Age"
label variable _zwfl_e "Weight-for-Height"
label variable cals_perperson_e "Household Calories Per Person (Endline)"
label variable num_hh_members_e "Number of Household Members"
label variable hh_head_male_e "HH Head Is Male"
label variable hh_head_age_e "HH Head Age"
label variable male "Child is Male"
label variable child_sick_e "Child Sick in Past Month"
label variable occulation "Child Is Or Will Be Fully Occulated"
label variable diarrhea "Had Diarrhea Past Month"


svyset vdc_code, strata(district)

/* Establish outliers with the flag variable */

/* Replace the flag on HH Assets = 1 if:
   More than 10 rooms used by household
   4 story house
   Enormous values of Houshold Land/Farmland
   */
  
gen flag_hh_assets = 0
replace flag_hh_assets = 1 if rooms_b >= 10 & rooms_b ~= .
replace flag_hh_assets = 1 if stories_b >= 4 & stories_b ~= .
replace flag_hh_assets = 1 if hh_land_b > 10 & hh_land_b ~= .
replace flag_hh_assets = 1 if farmland_b > 80 & farmland_b ~= .

/* Replace the flag on Calories = 1 if:
   Calories per person exceed 5000 */
gen flag_cals = 0
replace flag_cals = 1 if cals_perperson_e > 5000

/* Replace the flag on Child Anthropometrics = 1 if:
   Weight-for-length Z-score is < -5
   Height-for-lenght Z-score is < -6
   Don't have both Height and Weight */
gen flag_anth = 0
replace flag_anth = 1 if _zwei_e < -5
replace flag_anth = 1 if _zlen_e < -6
replace flag_anth = 1 if _zwei_e == . | _zlen_e == .
   

gen flag = flag_hh_assets == 1 | flag_cals == 1 | flag_anth == 1

tab interviewer_name_e, gen(enumerator_fe)

// Re-scale ASQ variables so that they have mean 0/standard deviaition of 1 in the control group
local asqvars "communication grossmotor finemotor personalsocial problemsolving"

foreach asq of local asqvars {
by child_tot_buckets, sort: egen `asq'_m_c = mean(asq_`asq'_e) if control_vdc == 1 & child_tot_months_e < 60 & flag == 0
by child_tot_buckets, sort: egen `asq'_sd_c = sd(asq_`asq'_e) if control_vdc == 1 & child_tot_months_e < 60 & flag == 0
by child_tot_buckets, sort: egen `asq'_sd = max(`asq'_sd_c) if child_tot_months_e < 60
by child_tot_buckets, sort: egen `asq'_m = max(`asq'_m_c) if child_tot_months_e < 60
gen asq_`asq'_z = (asq_`asq'_e-`asq'_m)/`asq'_sd if child_tot_months_e < 60

}

// Create indexes as in Anderson 2008: ASQ index
corrmat asq_communication_z asq_grossmotor_z asq_finemotor_z asq_problemsolving_z asq_personalsocial_z, covmat(cog_cov)
mat ones = J(5,1,1)
mat weights = inv(ones'*inv(cog_cov)*ones)
local weight = weights[1,1]
mat cog_cov_inv = inv(cog_cov)
foreach col of numlist 1/5 {
local weightvar`col' = cog_cov_inv[1,`col']+cog_cov_inv[2,`col']+cog_cov_inv[3,`col']+cog_cov_inv[4,`col']+cog_cov_inv[5,`col']
}
gen asq_index = `weight'*(`weightvar1'*asq_communication_z + `weightvar2'*asq_grossmotor_z + `weightvar3'*asq_finemotor_z + `weightvar4'*asq_problemsolving_z + `weightvar5'*asq_personalsocial_z)

// Create indexes as in Anderson 2008: Anthropometric index (all variables should have a "positive" lens first, i.e., not stunted instead of stunted)
gen notstunted = stunted_e == 0
replace notstunted = . if stunted_e == .

gen notunderweight = underweight_e == 0
replace notunderweight = . if underweight_e == .

gen notwasted = wasted_e == 0
replace notwasted = . if wasted_e == .

gen notsick = child_sick_e == 0

local demean "notstunted notunderweight notwasted notsick"
foreach var of local demean {
egen `var'_m_c = mean(`var') if control_vdc == 1 & flag == 0
egen `var'_sd_c = sd(`var') if control_vdc == 1 & flag == 0
egen `var'_m = max(`var'_m_c)
egen `var'_sd = max(`var'_sd_c)
gen `var'_z = (`var'-`var'_m)/`var'_sd
}


corrmat notstunted_z notunderweight_z notwasted_z notsick_z, covmat(anth_cov)
mat ones = J(4,1,1)
mat weights = inv(ones'*inv(anth_cov)*ones)
local weight = weights[1,1]
mat anth_cov_inv = inv(anth_cov)
foreach col of numlist 1/4 {
local weightvar`col' = anth_cov_inv[1,`col']+anth_cov_inv[2,`col']+anth_cov_inv[3,`col']+anth_cov_inv[4,`col']
}
gen anth_index = `weight'*(`weightvar1'*notstunted_z + `weightvar2'*notunderweight_z + `weightvar3'*notwasted_z + `weightvar4'*notsick_z)

save "`folder'/Data/child_follow_up_reg_data", replace


